suppressMessages(
c(library(Seurat),
library(tidyverse),
library(Matrix),
library(RCurl),
library(scales),
library(cowplot),
library(SingleCellExperiment),
library(AnnotationHub),
library(ensembldb),
library(gridExtra))
)
## [1] "gridExtra" "ensembldb" "AnnotationFilter" "GenomicFeatures" "AnnotationDbi"
## [6] "AnnotationHub" "BiocFileCache" "dbplyr" "SingleCellExperiment" "SummarizedExperiment"
## [11] "DelayedArray" "matrixStats" "Biobase" "GenomicRanges" "GenomeInfoDb"
## [16] "IRanges" "S4Vectors" "BiocGenerics" "parallel" "stats4"
## [21] "cowplot" "scales" "RCurl" "Matrix" "forcats"
## [26] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [31] "tibble" "ggplot2" "tidyverse" "Seurat" "stats"
## [36] "graphics" "grDevices" "utils" "datasets" "methods"
## [41] "base" "gridExtra" "ensembldb" "AnnotationFilter" "GenomicFeatures"
## [46] "AnnotationDbi" "AnnotationHub" "BiocFileCache" "dbplyr" "SingleCellExperiment"
## [51] "SummarizedExperiment" "DelayedArray" "matrixStats" "Biobase" "GenomicRanges"
## [56] "GenomeInfoDb" "IRanges" "S4Vectors" "BiocGenerics" "parallel"
## [61] "stats4" "cowplot" "scales" "RCurl" "Matrix"
## [66] "forcats" "stringr" "dplyr" "purrr" "readr"
## [71] "tidyr" "tibble" "ggplot2" "tidyverse" "Seurat"
## [76] "stats" "graphics" "grDevices" "utils" "datasets"
## [81] "methods" "base" "gridExtra" "ensembldb" "AnnotationFilter"
## [86] "GenomicFeatures" "AnnotationDbi" "AnnotationHub" "BiocFileCache" "dbplyr"
## [91] "SingleCellExperiment" "SummarizedExperiment" "DelayedArray" "matrixStats" "Biobase"
## [96] "GenomicRanges" "GenomeInfoDb" "IRanges" "S4Vectors" "BiocGenerics"
## [101] "parallel" "stats4" "cowplot" "scales" "RCurl"
## [106] "Matrix" "forcats" "stringr" "dplyr" "purrr"
## [111] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [116] "Seurat" "stats" "graphics" "grDevices" "utils"
## [121] "datasets" "methods" "base" "gridExtra" "ensembldb"
## [126] "AnnotationFilter" "GenomicFeatures" "AnnotationDbi" "AnnotationHub" "BiocFileCache"
## [131] "dbplyr" "SingleCellExperiment" "SummarizedExperiment" "DelayedArray" "matrixStats"
## [136] "Biobase" "GenomicRanges" "GenomeInfoDb" "IRanges" "S4Vectors"
## [141] "BiocGenerics" "parallel" "stats4" "cowplot" "scales"
## [146] "RCurl" "Matrix" "forcats" "stringr" "dplyr"
## [151] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [156] "tidyverse" "Seurat" "stats" "graphics" "grDevices"
## [161] "utils" "datasets" "methods" "base" "gridExtra"
## [166] "ensembldb" "AnnotationFilter" "GenomicFeatures" "AnnotationDbi" "AnnotationHub"
## [171] "BiocFileCache" "dbplyr" "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"
## [176] "matrixStats" "Biobase" "GenomicRanges" "GenomeInfoDb" "IRanges"
## [181] "S4Vectors" "BiocGenerics" "parallel" "stats4" "cowplot"
## [186] "scales" "RCurl" "Matrix" "forcats" "stringr"
## [191] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [196] "ggplot2" "tidyverse" "Seurat" "stats" "graphics"
## [201] "grDevices" "utils" "datasets" "methods" "base"
## [206] "gridExtra" "ensembldb" "AnnotationFilter" "GenomicFeatures" "AnnotationDbi"
## [211] "AnnotationHub" "BiocFileCache" "dbplyr" "SingleCellExperiment" "SummarizedExperiment"
## [216] "DelayedArray" "matrixStats" "Biobase" "GenomicRanges" "GenomeInfoDb"
## [221] "IRanges" "S4Vectors" "BiocGenerics" "parallel" "stats4"
## [226] "cowplot" "scales" "RCurl" "Matrix" "forcats"
## [231] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [236] "tibble" "ggplot2" "tidyverse" "Seurat" "stats"
## [241] "graphics" "grDevices" "utils" "datasets" "methods"
## [246] "base" "gridExtra" "ensembldb" "AnnotationFilter" "GenomicFeatures"
## [251] "AnnotationDbi" "AnnotationHub" "BiocFileCache" "dbplyr" "SingleCellExperiment"
## [256] "SummarizedExperiment" "DelayedArray" "matrixStats" "Biobase" "GenomicRanges"
## [261] "GenomeInfoDb" "IRanges" "S4Vectors" "BiocGenerics" "parallel"
## [266] "stats4" "cowplot" "scales" "RCurl" "Matrix"
## [271] "forcats" "stringr" "dplyr" "purrr" "readr"
## [276] "tidyr" "tibble" "ggplot2" "tidyverse" "Seurat"
## [281] "stats" "graphics" "grDevices" "utils" "datasets"
## [286] "methods" "base" "gridExtra" "ensembldb" "AnnotationFilter"
## [291] "GenomicFeatures" "AnnotationDbi" "AnnotationHub" "BiocFileCache" "dbplyr"
## [296] "SingleCellExperiment" "SummarizedExperiment" "DelayedArray" "matrixStats" "Biobase"
## [301] "GenomicRanges" "GenomeInfoDb" "IRanges" "S4Vectors" "BiocGenerics"
## [306] "parallel" "stats4" "cowplot" "scales" "RCurl"
## [311] "Matrix" "forcats" "stringr" "dplyr" "purrr"
## [316] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [321] "Seurat" "stats" "graphics" "grDevices" "utils"
## [326] "datasets" "methods" "base" "gridExtra" "ensembldb"
## [331] "AnnotationFilter" "GenomicFeatures" "AnnotationDbi" "AnnotationHub" "BiocFileCache"
## [336] "dbplyr" "SingleCellExperiment" "SummarizedExperiment" "DelayedArray" "matrixStats"
## [341] "Biobase" "GenomicRanges" "GenomeInfoDb" "IRanges" "S4Vectors"
## [346] "BiocGenerics" "parallel" "stats4" "cowplot" "scales"
## [351] "RCurl" "Matrix" "forcats" "stringr" "dplyr"
## [356] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [361] "tidyverse" "Seurat" "stats" "graphics" "grDevices"
## [366] "utils" "datasets" "methods" "base" "gridExtra"
## [371] "ensembldb" "AnnotationFilter" "GenomicFeatures" "AnnotationDbi" "AnnotationHub"
## [376] "BiocFileCache" "dbplyr" "SingleCellExperiment" "SummarizedExperiment" "DelayedArray"
## [381] "matrixStats" "Biobase" "GenomicRanges" "GenomeInfoDb" "IRanges"
## [386] "S4Vectors" "BiocGenerics" "parallel" "stats4" "cowplot"
## [391] "scales" "RCurl" "Matrix" "forcats" "stringr"
## [396] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [401] "ggplot2" "tidyverse" "Seurat" "stats" "graphics"
## [406] "grDevices" "utils" "datasets" "methods" "base"
This is using data from "T Helper Cell Cytokines Modulate Intestinal Stem Cell Renewal and Differentiation", Biton et al.,2018. The data was used to generate Figure 5E of the paper.
seurat_data <- read.table("Data/Fig5E_Log2TPM.txt.gz", sep = "\t")
rownames(seurat_data) <- seurat_data[,1]
colnames(seurat_data) <- seurat_data[1,]
seurat_data <- seurat_data[,-1]
seurat_data <- seurat_data[-1,]
seurat_obj <- CreateSeuratObject(counts = seurat_data, min.features = 100)
## Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
# Add number of genes per UMI for each cell to metadata
seurat_obj$log10GenesPerUMI <- log10(seurat_obj$nFeature_RNA) / log10(seurat_obj$nCount_RNA)
# Compute percent mito ratio
seurat_obj$mitoRatio <- PercentageFeatureSet(object = seurat_obj, pattern = "^mt-")
seurat_obj$mitoRatio <- seurat_obj@meta.data$mitoRatio / 100
# Create metadata dataframe
metadata <- seurat_obj@meta.data
# Add cell IDs to metadata
metadata$cells <- rownames(metadata)
# Rename columns
metadata <- metadata %>%
dplyr::rename(seq_folder = orig.ident,
nUMI = nCount_RNA,
nGene = nFeature_RNA)
metadata <- separate(metadata, cells, c("barcode", "CD45", "genotype", "sample", "condition", "orig.annotation"), sep = "_", remove = FALSE, convert = TRUE)
metadata$CD45 <- NULL
# Add metadata back to Seurat object
seurat_obj@meta.data <- metadata
seurat <- subset(x = seurat_obj, subset = genotype == "MHCIIFlox", invert = FALSE)
seurat$nCount_RNA <- NULL
seurat$nFeature_RNA <- NULL
The filtering is done using standard criteria, nUMI >= 500, nGene >= 250, log10GenesPerUMI > 0.8, and mitoRatio < 0.2.
Also any genes that are expressed in less than 10 cells were also filtered out.
This is more just to check since I believe this data has already been filtered by the authors.
# need to correct the sample column of the metadata
metadata <- seurat@meta.data
metadata$sample <- paste(metadata$condition, metadata$sample, sep = "-")
seurat@meta.data <- metadata
# Visualize the number of cell counts per sample
metadata %>%
ggplot(aes(x=sample, fill=sample)) +
geom_bar() +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("NCells")
# Visualize the number UMIs/transcripts per cell
metadata %>%
ggplot(aes(color=sample, x=nUMI, fill= sample)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
ylab("Cell density") +
geom_vline(xintercept = 500) +
facet_wrap(.~condition)
# Visualize the distribution of genes detected per cell via density plot
metadata %>%
ggplot(aes(color=sample, x=nGene, fill= sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
geom_vline(xintercept = 300) +
facet_wrap(.~condition)
# Visualize the distribution of genes detected per cell via violin plot
metadata %>%
ggplot(aes(x=sample, y=log10(nGene), fill=sample)) +
geom_violin(aes(fill = NULL, color = sample)) +
geom_boxplot(width = 0.1) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("NCells vs NGenes")
# Visualize the correlation between genes detected and number of UMIs and determine whether strong presence of cells with low numbers of genes/UMIs
metadata %>%
ggplot(aes(x=nUMI, y=nGene, color=mitoRatio)) +
geom_point() +
scale_colour_gradient(low = "gray90", high = "black", limits = c(0, 0.4)) +
stat_smooth(method=lm) +
scale_x_log10() +
scale_y_log10() +
theme_classic() +
geom_vline(xintercept = 500) +
geom_hline(yintercept = 250) +
facet_wrap(~sample)
## `geom_smooth()` using formula 'y ~ x'
# Visualize the distribution of mitochondrial gene expression detected per cell
metadata %>%
ggplot(aes(color=sample, x=mitoRatio, fill=sample)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
geom_vline(xintercept = 0.2) +
facet_wrap(.~condition)
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 97 rows containing non-finite values (stat_density).
# Visualize the overall complexity of the gene expression by visualizing the genes detected per UMI
metadata %>%
ggplot(aes(x=log10GenesPerUMI, color = sample, fill=sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
geom_vline(xintercept = 0.8) +
facet_wrap(.~condition)
filtered_seurat <- subset(x = seurat,
subset= (nUMI >= 500) &
(nGene >= 250) &
(log10GenesPerUMI > 0.80) &
(mitoRatio < 0.20))
# Extract counts
counts <- GetAssayData(object = filtered_seurat, slot = "counts")
# Output a logical vector for every gene on whether the more than zero counts per cell
nonzero <- counts > 0
# Sums all TRUE values and returns TRUE if more than 10 TRUE values per gene
keep_genes <- Matrix::rowSums(nonzero) >= 10
# Only keeping those genes expressed in more than 10 cells
filtered_counts <- counts[keep_genes, ]
# Reassign to filtered Seurat object
filtered_seurat <- CreateSeuratObject(filtered_counts, meta.data = filtered_seurat@meta.data)
Visualize the filtered data to confirm successful filtering.
## Save filtered subset to new metadata
metadata_clean <- filtered_seurat@meta.data
grid.arrange(
## Visualize the number of cell counts per sample
metadata_clean %>%
ggplot(aes(x=sample, fill=sample)) +
geom_bar() +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("NCells"),
# Visualize the distribution of genes detected per cell via density plot
metadata_clean %>%
ggplot(aes(color=sample, x=nGene, fill= sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
scale_x_log10() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
geom_vline(xintercept = 300) +
facet_wrap(.~condition),
# Visualize the distribution of genes detected per cell via violin plot
metadata_clean %>%
ggplot(aes(x=sample, y=log10(nGene), fill=sample)) +
geom_violin(aes(fill = NULL, color = sample)) +
geom_boxplot(width = 0.1) +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("NCells vs NGenes"),
# Visualize the correlation between genes detected and number of UMIs and determine whether strong presence of cells with low numbers of genes/UMIs
metadata_clean %>%
ggplot(aes(x=nUMI, y=nGene, color=mitoRatio)) +
geom_point() +
scale_colour_gradient(low = "gray90", high = "black", limits = c(0, 0.4)) +
stat_smooth(method=lm) +
scale_x_log10() +
scale_y_log10() +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
geom_vline(xintercept = 500) +
geom_hline(yintercept = 250) +
facet_wrap(~sample),
# Visualize the distribution of mitochondrial gene expression detected per cell
metadata_clean %>%
ggplot(aes(color=sample, x=mitoRatio, fill=sample)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
theme_classic() +
geom_vline(xintercept = 0.2) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
facet_wrap(.~condition),
# Visualize the overall complexity of the gene expression by visualizing the genes detected per UMI
metadata_clean %>%
ggplot(aes(x=log10GenesPerUMI, color = sample, fill=sample)) +
geom_density(alpha = 0.2) +
theme_classic() +
geom_vline(xintercept = 0.8) +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
facet_wrap(.~condition),
nrow = 3
)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 97 rows containing non-finite values (stat_density).
# Create .RData object to load at any time
save(filtered_seurat, file="Data/seurat_filtered.RData")
We want to first explore if cells cluster by cell cycle. The cell cycle markers were generated using information from the HBC training material.
# Normalize the counts for cell cycle phase exploration
seurat_phase <- NormalizeData(filtered_seurat)
# make a list of cell cycle markers for mouse
#symbol_2_ensembl <- read_csv("Data/mouse_symbol_Ensembl.csv")
#mouse_cycle_marker <- read_csv("Data/Mus_musculus.csv")
#mouse_cycle_marker <- inner_join(mouse_cycle_marker, symbol_2_ensembl[,c(2,4)], by = c("geneID" = "Ensembl Gene ID") )
#g2m_genes <- mouse_cycle_marker %>% dplyr::filter(phase == "G2/M")%>% dplyr::select(`Marker Symbol`) %>% unlist()
#s_genes <- mouse_cycle_marker %>% dplyr::filter(phase == "S")%>% dplyr::select(`Marker Symbol`) %>% unlist()
#save(g2m_genes, s_genes, file = "Data/mouse_cycle maker.rda")
# Load cell cycle markers
load("Data/mouse_cycle maker.rda")
# Score cells for cell cycle
seurat_phase <- CellCycleScoring(seurat_phase,
g2m.features = g2m_genes,
s.features = s_genes)
## Warning: The following features are not present in the object: Jpt1, Pimreg, not searching for symbol synonyms
# Identify the most variable genes
seurat_phase <- FindVariableFeatures(seurat_phase,
selection.method = "vst",
nfeatures = 2000,
verbose = FALSE)
# Scale the counts
seurat_phase <- ScaleData(seurat_phase)
## Centering and scaling data matrix
# Perform PCA
seurat_phase <- RunPCA(seurat_phase)
## PC_ 1
## Positive: Cd79a, H2-Eb1, H2-Aa, Cd74, Ebf1, H2-DMb2, Ms4a1, Ly6d, H2-Ob, Fcmr
## Cd79b, Cd19, Mef2c, Tnfrsf13c, Bank1, H2-Oa, Vpreb3, Ly86, Blk, Cd22
## Unc93b1, Cd24a, H2-DMa, Ifi30, Ctsh, Blnk, Scd1, Ly6a, Fcer2a, Serinc3
## Negative: Cd3g, Nkg7, Cd3e, Ccl5, Cd7, Ctsw, Cd3d, Cd8a, Prkch, Cst7
## Itgae, Gzmb, Klrd1, Gzma, Dapk2, Il2rb, Lck, Cd96, Skap1, Zap70
## Chn2, Fgl2, Ikzf2, Fcer1g, Id2, Rgs1, Cd244, Rinl, Actn2, Ccnd2
## PC_ 2
## Positive: Ptprcap, Gimap6, Gimap4, Ikzf3, Gimap3, mt-Nd4l, Gimap7, mt-Atp8, mt-Nd1, Shisa5
## mt-Nd2, mt-Atp6, Malat1, Hvcn1, mt-Cytb, mt-Co2, mt-Co3, mt-Co1, Tmsb10, Bcl2
## Cd79b, Lck, Ebf1, Cd79a, Ltb, Ms4a1, Fcmr, Dtx1, H2-Q6, Cd3g
## Negative: Ifitm2, Lyz2, Tnfaip2, Cd300a, Pla2g7, AF251705, Lst1, Pilra, Tgfbi, Mpeg1
## Csf1r, Ifitm3, Fcgr4, Ms4a6c, Clec4n, Cd68, Wfdc17, Igsf6, Lrrc25, Pilrb2
## Il1b, Fcgr3, Alox5ap, Aif1, Csf2rb, App, Ccr1, Sirpa, Ccl6, Plbd1
## PC_ 3
## Positive: Apol7c, Dnase1l3, Cxcl16, Aif1, Xlr, Apol10b, Rgs10, Clec9a, Clec1b, Ctla2b
## Sult1a1, Ms4a7, Spint1, Ppfia4, Tnni2, C1qa, Batf3, Hfe, Cadm1, Ltc4s
## C1qc, Aldh1a2, Tifab, C1qb, Mgl2, Cystm1, Npl, Adamdec1, Bvht, Atpif1
## Negative: Cxcr2, Hdc, S100a8, S100a9, Slfn4, Clec4d, Irg1, Clec4e, Malat1, Csf3r
## Trem1, Hp, Ifitm1, G0s2, Hcar2, Trem3, Ptafr, Sirpb1b, Slfn1, Slc7a11
## Cxcl2, Ier3, Il1f9, Arg2, Emilin2, Upp1, Ptgs2, C3, Retnlg, Ccl6
## PC_ 4
## Positive: C1qc, C1qb, C1qa, Csf1r, Ms4a7, Gbp2b, Adamdec1, Mafb, Ms4a4a, Npl
## Msr1, C3ar1, Ptgs1, mt-Atp6, mt-Atp8, Ecm1, Pf4, P2ry6, mt-Co1, Cx3cr1
## mt-Co2, Fcgr1, Ms4a6d, Pla2g2d, Mmp14, mt-Co3, Ctsc, mt-Nd4l, Clec4a1, Tgfbi
## Negative: Jchain, Sdc1, Slpi, Derl3, Tnni2, Clec9a, Eaf2, Prg2, Aldh1a2, Cks1b
## Birc5, 2810417H13Rik, Ube2c, Ccr10, Txn1, Tnfrsf17, Ccna2, Glipr1, Cdca3, Ell2
## Stmn1, Spc24, Cdk1, Prdx4, Basp1, Creld2, Ctla2b, Fkbp11, Tpm2, Dstn
## PC_ 5
## Positive: Tmsb4x, Tnni2, Actg1, Clec9a, Ctla2b, Aldh1a2, Tmsb10, Serpinb6b, Anxa1, Tpm2
## Cxx1c, Ppp1r14a, Btg1, Xcr1, Gm9844, Hfe, S100a11, Pak1, Gm11837, Bcl2l14
## Vwf, Sult1a1, Ifi205, Bvht, Bcl2a1a, Bcl2a1d, Xlr, Crip1, Ccl22, Serpinb9
## Negative: Sdc1, Tmem176b, Tmem176a, Derl3, Prg2, Tnfrsf17, Ccr10, Trf, Eaf2, Selm
## Fkbp11, Prdx4, Slpi, Ell2, Kcnn4, Edem2, Lman1, Edem1, Pls1, Fam46c
## Ckap4, Txndc5, Ly6c2, Pdia4, Txndc11, H13, Jchain, Fkbp2, Xbp1, Cd93
# Plot the PCA colored by cell cycle phase
DimPlot(seurat_phase,
reduction = "pca",
group.by= "Phase",
split.by = "Phase")
It doesn't seem like the cells have cell-cycle specific clustering pattern so we can proceed to SCT transformation without regressing out cell cycle effects.
## no clear clustering based on cell cycle was observed so we can move on to SCTransform
options(future.globals.maxSize = 4000 * 1024^2)
# Split seurat object by condition to perform cell cycle scoring and SCT on all samples
split_seurat <- SplitObject(filtered_seurat, split.by = "sample")
suppressMessages(suppressWarnings(for (i in 1:length(split_seurat)) {
split_seurat[[i]] <- NormalizeData(split_seurat[[i]], verbose = FALSE)
split_seurat[[i]] <- CellCycleScoring(split_seurat[[i]], g2m.features=g2m_genes, s.features=s_genes, , verbose = FALSE)
split_seurat[[i]] <- SCTransform(split_seurat[[i]], vars.to.regress = c("mitoRatio"), , verbose = FALSE)
}))
This is to integrate all samples so they can be visualized in the same space and also remove batch effects across samples, assuming that they are from different batches.
# Select the most variable features to use for integration
integ_features <- SelectIntegrationFeatures(object.list = split_seurat,
nfeatures = 3000)
# Prepare the SCT list object for integration
split_seurat <- PrepSCTIntegration(object.list = split_seurat,
anchor.features = integ_features)
# Find best buddies - can take a while to run
suppressMessages(integ_anchors <- FindIntegrationAnchors(object.list = split_seurat,
normalization.method = "SCT",
anchor.features = integ_features))
# Integrate across samples
suppressMessages(seurat_integrated <- IntegrateData(anchorset = integ_anchors,
normalization.method = "SCT"))
## Warning: Adding a command log without an assay associated with it
seurat_integrated@meta.data$sample <- factor(seurat_integrated@meta.data$sample, levels = c("Homeostasis-M1", "Homeostasis-M2", "Homeostasis-M3", "HPDay4-M1", "HPDay4-M2", "HPDay4-M3"))
seurat_integrated@meta.data$condition <- factor(seurat_integrated@meta.data$condition, levels = c("Homeostasis", "HPDay4"))
# Save integrated seurat object
saveRDS(seurat_integrated, "Results/integrated_seurat.rds")
# Run PCA
seurat_integrated <- RunPCA(object = seurat_integrated)
## PC_ 1
## Positive: Lyz2, Apol7c, C1qb, C1qa, Aif1, C1qc, Csf1r, Ifitm2, Apoe, Ms4a7
## Pla2g7, AF251705, Tgfbi, Ms4a6c, Cxcl16, Clec4n, Cd68, Dnase1l3, Lst1, Cd63
## Adamdec1, Fcgr4, Mpeg1, Mafb, Fcgr3, Pilra, Npl, Sepp1, Tifab, Plbd1
## Negative: Ccl5, Cd7, Cd3g, Nkg7, Gzma, Cd3e, Gzmb, Cd8a, Cd3d, Ctsw
## Lck, Cst7, Il2rb, Xcl1, Dapk2, Klrd1, Cd160, Ikzf2, Skap1, Itgae
## Lat, Ltb, Cd79a, Tnfrsf9, Cd79b, Thy1, Ms4a4b, Klra5, Ddc, Ccnd2
## PC_ 2
## Positive: Ccl5, Cd7, Cd3g, Gzma, Nkg7, Fcer1g, Cd3e, Gzmb, Cd8a, Cd3d
## Ctsw, AW112010, Klrd1, Dapk2, Cst7, Il2rb, Xcl1, Tyrobp, Cd160, Itgae
## Fgl2, Skap1, Lck, Fyb, Ikzf2, Car2, Ccnd2, Asb2, Amica1, Rgs1
## Negative: Cd74, Cd79a, H2-Aa, H2-Eb1, Ly6d, Ms4a1, Cd24a, Cd79b, Fcmr, Vpreb3
## H2-DMb2, Ebf1, Mzb1, Ly6a, H2-Ob, Tnfrsf13c, Cd19, Ifi30, Mef2c, Jchain
## Ifi27l2a, H2-Oa, Blk, Pkig, Bank1, Ly86, Blnk, Fcrla, H2-Eb2, Siglecg
## PC_ 3
## Positive: Ckb, Alox5ap, Clec9a, Serpinb6b, Actn1, Ttc39a, Xcr1, Ifitm6, Tbc1d9, Dapk1
## Tnni2, Ppp1r14a, Cxx1c, Pak1, A530099J19Rik, Ctla2b, Plet1, Cst3, Adam8, Ppp1r1a
## Ifi205, Ffar4, Aldh1a2, Anpep, Atf3, Lgals3, Tpm2, Hfe, Plpp1, Anxa1
## Negative: C1qb, Apoe, C1qc, C1qa, Ms4a7, Adamdec1, Npl, Sepp1, Csf1r, Gbp2b
## Cd79a, Ms4a4a, Cd63, Mafb, Ly6d, Cd79b, Pla2g7, Clec4n, Ccl24, Ptgs1
## Tgfbi, Fcmr, Pf4, Ms4a1, Fcgr1, Fcgr4, Tmem176b, Vpreb3, Ebf1, Lgmn
## PC_ 4
## Positive: Ccl6, S100a6, Emilin2, Hp, Alox5ap, Dusp1, Sirpb1b, Gda, Sirpb1c, Il1b
## Csf3r, Sirpb1a, Nr4a1, Ifitm6, Msrb1, Ifitm2, Cebpb, Gm9733, Slfn1, Fos
## Ccrl2, Trem3, Trib1, Anxa2, Rsad2, Clec4a3, Cd300ld, Krt80, Cxcl2, Ltb4r1
## Negative: Apol7c, Dnase1l3, Cxcl16, Ttc39a, Xcr1, Clec9a, Rgs10, C1qa, C1qb, Aif1
## C1qc, Ltc4s, Ctla2b, Cadm1, 2810417H13Rik, A530099J19Rik, Birc5, Cd63, Plpp1, Shtn1
## Serpinb6b, Spint1, Sult1a1, Rab7b, Xlr, Tpm2, Clec1b, Ckb, Apol10b, Mt1
## PC_ 5
## Positive: 2810417H13Rik, Birc5, Ccna2, Mki67, Jchain, Ube2c, Cdca3, Top2a, Rgs13, Stmn1
## Rrm2, Cdk1, Cdca8, Ccnb2, Cenpf, Cdkn3, Kif2c, Hmmr, Spc24, Ccnb1
## Cdc20, Prc1, Cks1b, Aicda, Cks2, Ms4a4b, Thy1, Cd8b1, Lipc, Nuggc
## Negative: Gzma, Gzmb, H2-Aa, Cd74, H2-DMb2, H2-Eb1, Tyrobp, Cd7, Ly6d, H2-Oa
## Cd79a, Bank1, Lgals3, Fcer1g, Dapk2, Fcmr, H2-Ob, Pxdc1, Ebf1, Serpinb1a
## Ifi30, H2-DMa, Cd22, Cd79b, Snn, Vpreb3, Itgae, Plac8, H2-Eb2, Ms4a1
colnames(seurat_integrated@meta.data)[1] <- "seq_folder"
# Plot PCA
PCAPlot(seurat_integrated,
split.by = "condition",
group.by = "condition")
# Run UMAP
suppressMessages(seurat_integrated <- RunUMAP(seurat_integrated,
dims = 1:40,
reduction = "pca"))
# Plot UMAP
DimPlot(seurat_integrated, group.by = "condition")
DimPlot(seurat_integrated,
split.by = "condition",
group.by = "condition")
Now the position of cells on the map is determined, we can move on to determine the clusters and their identity.
# Explore heatmap of PCs
DimHeatmap(seurat_integrated,
dims = 1:9,
cells = 500,
balanced = TRUE)
print(x = seurat_integrated[["pca"]],
dims = 1:10,
nfeatures = 5)
## PC_ 1
## Positive: Lyz2, Apol7c, C1qb, C1qa, Aif1
## Negative: Ccl5, Cd7, Cd3g, Nkg7, Gzma
## PC_ 2
## Positive: Ccl5, Cd7, Cd3g, Gzma, Nkg7
## Negative: Cd74, Cd79a, H2-Aa, H2-Eb1, Ly6d
## PC_ 3
## Positive: Ckb, Alox5ap, Clec9a, Serpinb6b, Actn1
## Negative: C1qb, Apoe, C1qc, C1qa, Ms4a7
## PC_ 4
## Positive: Ccl6, S100a6, Emilin2, Hp, Alox5ap
## Negative: Apol7c, Dnase1l3, Cxcl16, Ttc39a, Xcr1
## PC_ 5
## Positive: 2810417H13Rik, Birc5, Ccna2, Mki67, Jchain
## Negative: Gzma, Gzmb, H2-Aa, Cd74, H2-DMb2
## PC_ 6
## Positive: Gzma, Gzmb, Fcer1g, Tyrobp, Cd7
## Negative: Ms4a4b, Cd8b1, Thy1, Lat, Ms4a6b
## PC_ 7
## Positive: Cxcl2, Ptgs2, Fos, Tnfaip2, Hdc
## Negative: Sirpb1a, Ccl9, Lrp1, Sirpb1c, Ccr2
## PC_ 8
## Positive: Lgals4, Krt8, Phgr1, Lgals2, Krt19
## Negative: Cd8b1, Ms4a4b, Cd74, H2-Aa, H2-Eb1
## PC_ 9
## Positive: Ifi205, Clec9a, Ttc39a, A530099J19Rik, Xcr1
## Negative: Plet1, Cd209a, Mt1, Ifitm6, Atf3
## PC_ 10
## Positive: Jchain, Pycr1, Derl3, Bhlha15, Cacna1s
## Negative: Ifit3, Ifit1, 2810417H13Rik, Birc5, Slfn5
# Plot the elbow plot
ElbowPlot(object = seurat_integrated,
ndims = 50)
# Determine the K-nearest neighbor graph
seurat_integrated <- FindNeighbors(object = seurat_integrated,
dims = 1:40)
## Computing nearest neighbor graph
## Computing SNN
# Determine the clusters for various resolutions
resoluations <- c(0.2, 0.4, 0.6, 0.8, 1.0, 1.4)
suppressMessages(seurat_integrated <- FindClusters(object = seurat_integrated,
resolution = resoluations))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 12296
## Number of edges: 641888
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9457
## Number of communities: 13
## Elapsed time: 3 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 12296
## Number of edges: 641888
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9148
## Number of communities: 19
## Elapsed time: 3 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 12296
## Number of edges: 641888
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8903
## Number of communities: 22
## Elapsed time: 4 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 12296
## Number of edges: 641888
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8698
## Number of communities: 25
## Elapsed time: 3 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 12296
## Number of edges: 641888
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8541
## Number of communities: 27
## Elapsed time: 3 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 12296
## Number of edges: 641888
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8288
## Number of communities: 31
## Elapsed time: 3 seconds
# Explore resolutions
nclusters <- c()
for (i in (dim(seurat_integrated@meta.data)[2]-length(resoluations)) : (dim(seurat_integrated@meta.data)[2]-1)) {
nclusters <- c(nclusters, length(unique(seurat_integrated@meta.data[,i])))
}
names(nclusters) <- resoluations
nclusters <- as.data.frame(nclusters)
nclusters$resolution <- rownames(nclusters)
rownames(nclusters) <- NULL
print(nclusters)
## nclusters resolution
## 1 13 0.2
## 2 19 0.4
## 3 22 0.6
## 4 25 0.8
## 5 27 1
## 6 31 1.4
Based on the Biton et al. 2018 paper, they identified 12 cell types. In the spirit of over-clustering, I picked resolution 0.4, which identified 19 clusters.
But before I look at my own clusters, I want to see how the original annotations match up with the new UMAP.
Idents(object = seurat_integrated) <- "orig.annotation"
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 4)
# Assign identity of clusters using 0.4 resoluation
Idents(object = seurat_integrated) <- "integrated_snn_res.0.4"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)
QC the clusters
# Extract identity and sample information from seurat object to determine the number of cells per cluster per sample
n_cells <- FetchData(seurat_integrated,
vars = c("ident", "sample")) %>%
dplyr::count(ident, sample) %>%
tidyr::spread(ident, n)
n_cells
## sample 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 1 Homeostasis-M1 826 6 70 54 94 36 68 101 9 33 12 50 40 8 26 9 5 9 8
## 2 Homeostasis-M2 917 6 95 108 205 45 19 127 38 46 13 57 56 5 45 5 9 5 12
## 3 Homeostasis-M3 1742 32 163 560 144 145 181 81 23 19 3 43 34 19 32 29 5 11 15
## 4 HPDay4-M1 49 831 194 27 144 154 114 77 116 156 76 42 19 53 9 12 16 8 NA
## 5 HPDay4-M2 61 297 132 23 57 169 75 87 134 110 140 45 44 59 15 11 24 14 NA
## 6 HPDay4-M3 61 774 267 27 115 160 111 64 147 100 93 81 18 51 13 32 20 10 NA
# UMAP of cells in each cluster by sample
DimPlot(seurat_integrated,
label = TRUE,
split.by = "condition") + NoLegend()
# Explore whether clusters segregate by cell cycle phase
DimPlot(seurat_integrated,
label = TRUE,
split.by = "Phase") + NoLegend()
# Determine metrics to plot present in seurat_integrated@meta.data
metrics <- c("nUMI", "nGene", "S.Score", "G2M.Score", "mitoRatio")
FeaturePlot(seurat_integrated,
reduction = "umap",
features = metrics,
pt.size = 0.4,
order = TRUE,
min.cutoff = 'q10',
label = TRUE)
# Defining the information in the seurat object of interest
columns <- c(paste0("PC_", 1:16),
"ident",
"UMAP_1", "UMAP_2")
# Extracting this data from the seurat object
pc_data <- FetchData(seurat_integrated,
vars = columns)
# Adding cluster label to center of cluster on UMAP
umap_label <- FetchData(seurat_integrated,
vars = c("ident", "UMAP_1", "UMAP_2")) %>%
group_by(ident) %>%
summarise(x=mean(UMAP_1), y=mean(UMAP_2))
## `summarise()` ungrouping output (override with `.groups` argument)
# Plotting a UMAP plot for each of the PCs
map(paste0("PC_", 1:16), function(pc){
ggplot(pc_data,
aes(UMAP_1, UMAP_2)) +
geom_point(aes_string(color=pc),
alpha = 0.7) +
scale_color_gradient(guide = FALSE,
low = "grey90",
high = "blue") +
geom_text(data=umap_label,
aes(label=ident, x, y)) +
ggtitle(pc)
}) %>%
plot_grid(plotlist = .)
Since this is a published dataset, I will use the markers that were used in the Biton et al. 2018 paper.
Here is the description of the markers they used: "In the case of the CD45+ immune cells, the following markers were used to interpret clusters post hoc. B cells: Bank1, Fcer2a, Cd79a, Cd79b, Cd22. B cell (cycling): Mki67, Cd79a, Cd79b, Ms4a1, Pou2af1. Cd4+ T helper cells: Cd4, Foxp3, Gata3, Cd3g, Tnfrsf4. Cd8 T cells: Gzma, Gzmb, Cd8a. Dendritic cells (DCs): Itgae, Tlr3, Cd209a, Cd209b, Irf8, H2-Ab1, Clec9a. Epithelial: Epcam, Krt8, Vil1, Muc2, Krt20. Inflammatory monocytes: Ccr2, Ly6c2, Fcgr1, Ifitm3, Fn1. Neutrophils: S100a9, S100a8, Csf3r, Cxcr2, Msrb1, Ccrl2. Natural Killer (NK) cells: Ncr1, Prf1, Klra1, Xcl1. Plasmacytoid dendritic cells: Siglech, Bst2, Tlr7. Plasma cell: Jchain, Mzb1, Xbp1, Sdc1."
DimPlot(object = seurat_integrated,
reduction = "umap",
label = TRUE) + NoLegend()
# Select the RNA counts slot to be the default assay
DefaultAssay(seurat_integrated) <- "RNA"
# Normalize RNA data for visualization purposes
seurat_integrated <- NormalizeData(seurat_integrated, verbose = FALSE)
FeaturePlot(seurat_integrated,
reduction = "umap",
features = c("Bank1", "Fcer2a", "Cd79a", "Cd79b", "Cd22"),
order = TRUE,
min.cutoff = 'q10',
label = TRUE,
ncol = 3)
This identifies cluster 0,3,4,14.
FeaturePlot(seurat_integrated,
reduction = "umap",
features = c("Mki67", "Cd79a", "Cd79b", "Ms4a1", "Pou2af1"),
order = TRUE,
min.cutoff = 'q10',
label = TRUE,
ncol = 3)
This identifies cluster 9.
FeaturePlot(seurat_integrated,
reduction = "umap",
features = c("Cd4", "Foxp3", "Gata3", "Cd3g", "Tnfrsf4"),
order = TRUE,
min.cutoff = 'q10',
label = TRUE,
ncol = 3)
This identifies cluster 12.
FeaturePlot(seurat_integrated,
reduction = "umap",
features = c("Gzma", "Gzmb", "Cd8a"),
order = TRUE,
min.cutoff = 'q10',
label = TRUE,
ncol = 3)
This identifies cluster 12.
FeaturePlot(seurat_integrated,
reduction = "umap",
features = c("Itgae", "Tlr3", "Cd209a", "Cd209b", "Irf8", "H2-Ab1", "Clec9a"),
order = TRUE,
min.cutoff = 'q10',
label = TRUE,
ncol = 3)
This identifies cluster 8,11,16.
FeaturePlot(seurat_integrated,
reduction = "umap",
features = c("Epcam", "Krt8", "Vil1", "Muc2", "Krt20"),
order = TRUE,
min.cutoff = 'q10',
label = TRUE,
ncol = 3)
This identifies cluster 15.
FeaturePlot(seurat_integrated,
reduction = "umap",
features = c("Ccr2", "Ly6c2", "Fcgr1", "Ifitm3", "Fn1"),
order = TRUE,
min.cutoff = 'q10',
label = TRUE,
ncol = 3)
## Warning in FetchData(object = object, vars = c(dims, "ident", features), : The following requested variables were not
## found: Ifitm3
This identifies cluster 13.
FeaturePlot(seurat_integrated,
reduction = "umap",
features = c("S100a9", "S100a8", "Csf3r", "Cxcr2", "Msrb1", "Ccrl2"),
order = TRUE,
min.cutoff = 'q10',
label = TRUE,
ncol = 3)
This identifies cluster 11.
FeaturePlot(seurat_integrated,
reduction = "umap",
features = c("Ncr1", "Prf1", "Klra1", "Xcl1"),
order = TRUE,
min.cutoff = 'q10',
label = TRUE,
ncol = 2)
This identifies cluster 17.
FeaturePlot(seurat_integrated,
reduction = "umap",
features = c("Siglech", "Mzb1", "Tlr7", "Sdc1"),
order = TRUE,
min.cutoff = 'q10',
label = TRUE,
ncol = 3)
This identifies cluster 18.
FeaturePlot(seurat_integrated,
reduction = "umap",
features = c("Jchain", "Bst2", "Xbp1"),
order = TRUE,
min.cutoff = 'q10',
label = TRUE,
ncol = 3)
This identifies cluster 10.
We will try to identify markers from the un-identified clusters: 7 and see if these markers can help us identify this cluster.
7 is likely M2 Macrophages since it is labeled as such but the paper didn't actually give me the marker genes they used to identify this cluster.
# Identification of conserved markers in all conditions
DefaultAssay(seurat_integrated) <- "RNA"
gene_info <- read_csv("Data/MRK_Sequence.csv")
## Parsed with column specification:
## cols(
## `MGI Marker Accession ID` = col_character(),
## `Marker Symbol` = col_character(),
## Status = col_character(),
## `Marker Type` = col_character(),
## `Marker Name` = col_character(),
## `cM position` = col_character(),
## Chromosome = col_character(),
## `Genome Coordinate Start` = col_double(),
## `Genome Coordinate End` = col_double(),
## Strand = col_character(),
## `GenBank IDs` = col_character(),
## `RefSeq transcript IDs` = col_character(),
## `Ensembl transcript IDs` = col_character(),
## `UniProt IDs` = col_character(),
## `TrEMBL IDs` = col_character(),
## `Ensembl protein IDs` = col_character(),
## `RefSeq protein IDs` = col_character(),
## `UniGene IDs` = col_double(),
## `Feature Type` = col_character()
## )
## Warning: 14 parsing failures.
## row col expected actual file
## 6166 UniGene IDs no trailing characters |205125 'Data/MRK_Sequence.csv'
## 6548 UniGene IDs no trailing characters |173249 'Data/MRK_Sequence.csv'
## 6856 UniGene IDs no trailing characters |234565 'Data/MRK_Sequence.csv'
## 6867 UniGene IDs no trailing characters |463312 'Data/MRK_Sequence.csv'
## 9026 UniGene IDs no trailing characters |252811 'Data/MRK_Sequence.csv'
## .... ........... ...................... ....... .......................
## See problems(...) for more details.
# Create function to get conserved markers for any given cluster
get_conserved <- function(cluster){
FindConservedMarkers(seurat_integrated,
ident.1 = cluster,
grouping.var = "condition",
only.pos = TRUE,
logfc.threshold = 0.25) %>%
rownames_to_column(var = "gene") %>%
left_join(y = unique(gene_info[, c("Marker Symbol", "Marker Name")]),
by = c("gene" = "Marker Symbol")) %>%
cbind(cluster_id = cluster, .)
}
# Iterate function across desired clusters
conserved_markers <- map_dfr(c(7), get_conserved)
## Testing group HPDay4: (7) vs (2, 1, 10, 9, 0, 8, 6, 12, 5, 13, 4, 16, 3, 11, 15, 17, 14)
## Testing group Homeostasis: (7) vs (0, 3, 6, 8, 4, 9, 5, 18, 10, 2, 11, 12, 1, 14, 15, 17, 13, 16)
# Save the marker genes
write_csv(conserved_markers, "Results/C7_conserved markers.csv")
We first look at the top 10 genes with highest averaged LFC ### Cluster 7
# Extract top 10 markers per cluster
top10 <- conserved_markers %>%
mutate(avg_lfc = (HPDay4_avg_logFC + Homeostasis_avg_logFC) /2) %>%
group_by(cluster_id) %>%
top_n(n = 10,
wt = avg_lfc)
# Visualize top 10 markers per cluster
top10
## # A tibble: 10 x 16
## # Groups: cluster_id [1]
## cluster_id gene HPDay4_p_val HPDay4_avg_logFC HPDay4_pct.1 HPDay4_pct.2 HPDay4_p_val_adj Homeostasis_p_v…
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 7 C1qb 0. 2.24 0.715 0.017 0. 0.
## 2 7 C1qc 0. 2.20 0.728 0.013 0. 0.
## 3 7 C1qa 0. 2.13 0.684 0.016 0. 0.
## 4 7 Ms4a… 0. 1.69 0.785 0.028 0. 0.
## 5 7 Tgfbi 0. 1.63 0.798 0.054 0. 0.
## 6 7 Mafb 0. 1.60 0.689 0.018 0. 0.
## 7 7 Csf1r 0. 1.54 0.754 0.046 0. 0.
## 8 7 Aif1 6.15e-279 1.64 0.93 0.113 9.22e-275 0.
## 9 7 Lyz2 8.27e-242 1.94 0.982 0.177 1.24e-237 0.
## 10 7 Apoe 2.14e-232 1.90 0.75 0.088 3.21e-228 1.05e-303
## # … with 8 more variables: Homeostasis_avg_logFC <dbl>, Homeostasis_pct.1 <dbl>, Homeostasis_pct.2 <dbl>,
## # Homeostasis_p_val_adj <dbl>, max_pval <dbl>, minimump_p_val <dbl>, `Marker Name` <chr>, avg_lfc <dbl>
Csf1r and Lyz2 are a good marker of macrophage. Mafb, Ms4a4a and Aif1 are a good marker for M2 macrophage. MS4A4A: a novel cell surface marker for M2 macrophages and plasma cells Colony-stimulating factor-1-induced AIF1 expression in tumor-associated macrophages enhances the progression of hepatocellular carcinoma The transcription factor MafB promotes anti-inflammatory M2 polarization and cholesterol efflux in macrophages The role of CSF1R-dependent macrophages in control of the intestinal stem-cell niche
FeaturePlot(seurat_integrated,
reduction = "umap",
features = c("Csf1r", "Lyz2", "Mafb", "Ms4a4a", "Aif1"),
order = TRUE,
min.cutoff = 'q10',
label = TRUE,
ncol = 3)
So cluster 7 is indeed M2 macrophages.
The original annotation actually worked quite well. UMAP clustering was slightly off compared to the original clystering, possibly due to the reduction of cells (I only used half of the cells for this analysis since I didn't use the MHCII-- cells). So I will be using the original annotation for the following analysis.
Idents(object = seurat_integrated) <- "orig.annotation"
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 4,
repel = TRUE)
DimPlot(object = seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 3,
repel = TRUE,
split.by = "condition") + NoLegend()
# Save final R object
write_rds(seurat_integrated,
path = "Results/seurat_labelled.rds")
sessionInfo()
## R version 4.0.0 (2020-04-24)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.4
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gridExtra_2.3 ensembldb_2.12.1 AnnotationFilter_1.12.0 GenomicFeatures_1.40.0
## [5] AnnotationDbi_1.50.0 AnnotationHub_2.20.0 BiocFileCache_1.12.0 dbplyr_1.4.4
## [9] SingleCellExperiment_1.10.1 SummarizedExperiment_1.18.1 DelayedArray_0.14.0 matrixStats_0.56.0
## [13] Biobase_2.48.0 GenomicRanges_1.40.0 GenomeInfoDb_1.24.0 IRanges_2.22.2
## [17] S4Vectors_0.26.1 BiocGenerics_0.34.0 cowplot_1.0.0 scales_1.1.1
## [21] RCurl_1.98-1.2 Matrix_1.2-18 forcats_0.5.0 stringr_1.4.0
## [25] dplyr_1.0.0 purrr_0.3.4 readr_1.3.1 tidyr_1.1.0
## [29] tibble_3.0.1 ggplot2_3.3.1 tidyverse_1.3.0 Seurat_3.1.5
##
## loaded via a namespace (and not attached):
## [1] utf8_1.1.4 reticulate_1.16 tidyselect_1.1.0
## [4] RSQLite_2.2.0 htmlwidgets_1.5.1 grid_4.0.0
## [7] BiocParallel_1.22.0 Rtsne_0.15 munsell_0.5.0
## [10] codetools_0.2-16 mutoss_0.1-12 ica_1.0-2
## [13] future_1.17.0 withr_2.2.0 colorspace_1.4-1
## [16] knitr_1.28 rstudioapi_0.11 ROCR_1.0-11
## [19] gbRd_0.4-11 listenv_0.8.0 Rdpack_0.11-1
## [22] labeling_0.3 GenomeInfoDbData_1.2.3 mnormt_2.0.0
## [25] bit64_0.9-7 farver_2.0.3 vctrs_0.3.1
## [28] generics_0.0.2 TH.data_1.0-10 xfun_0.14
## [31] R6_2.4.1 rsvd_1.0.3 bitops_1.0-6
## [34] assertthat_0.2.1 promises_1.1.1 multcomp_1.4-13
## [37] gtable_0.3.0 globals_0.12.5 sandwich_2.5-1
## [40] rlang_0.4.6 splines_4.0.0 rtracklayer_1.48.0
## [43] lazyeval_0.2.2 broom_0.5.6 BiocManager_1.30.10
## [46] yaml_2.2.1 reshape2_1.4.4 modelr_0.1.8
## [49] backports_1.1.7 httpuv_1.5.4 tools_4.0.0
## [52] ellipsis_0.3.1 RColorBrewer_1.1-2 ggridges_0.5.2
## [55] TFisher_0.2.0 Rcpp_1.0.4.6 plyr_1.8.6
## [58] progress_1.2.2 zlibbioc_1.34.0 prettyunits_1.1.1
## [61] openssl_1.4.1 pbapply_1.4-2 zoo_1.8-8
## [64] haven_2.3.1 ggrepel_0.8.2 cluster_2.1.0
## [67] fs_1.4.1 magrittr_1.5 data.table_1.12.8
## [70] RSpectra_0.16-0 lmtest_0.9-37 reprex_0.3.0
## [73] RANN_2.6.1 tmvnsim_1.0-2 mvtnorm_1.1-1
## [76] ProtGenerics_1.20.0 fitdistrplus_1.1-1 evaluate_0.14
## [79] hms_0.5.3 patchwork_1.0.0 mime_0.9
## [82] xtable_1.8-4 XML_3.99-0.3 readxl_1.3.1
## [85] compiler_4.0.0 biomaRt_2.44.0 KernSmooth_2.23-17
## [88] crayon_1.3.4 htmltools_0.4.0 mgcv_1.8-31
## [91] later_1.1.0.1 lubridate_1.7.9 DBI_1.1.0
## [94] MASS_7.3-51.6 rappdirs_0.3.1 cli_2.0.2
## [97] metap_1.3 igraph_1.2.5 pkgconfig_2.0.3
## [100] sn_1.6-2 GenomicAlignments_1.24.0 numDeriv_2016.8-1.1
## [103] plotly_4.9.2.1 xml2_1.3.2 multtest_2.44.0
## [106] XVector_0.28.0 bibtex_0.4.2.2 rvest_0.3.5
## [109] digest_0.6.25 sctransform_0.2.1 RcppAnnoy_0.0.16
## [112] tsne_0.1-3 Biostrings_2.56.0 rmarkdown_2.2
## [115] cellranger_1.1.0 leiden_0.3.3 uwot_0.1.8
## [118] curl_4.3 shiny_1.4.0.2 Rsamtools_2.4.0
## [121] lifecycle_0.2.0 nlme_3.1-148 jsonlite_1.6.1
## [124] limma_3.44.2 viridisLite_0.3.0 askpass_1.1
## [127] fansi_0.4.1 pillar_1.4.4 lattice_0.20-41
## [130] plotrix_3.7-8 fastmap_1.0.1 httr_1.4.1
## [133] survival_3.1-12 interactiveDisplayBase_1.26.3 glue_1.4.1
## [136] png_0.1-7 BiocVersion_3.11.1 bit_1.1-15.2
## [139] stringi_1.4.6 blob_1.2.1 memoise_1.1.0
## [142] irlba_2.3.3 future.apply_1.5.0 ape_5.4